package chapter03
import org.apache.spark.{SparkConf, SparkContext}
object Test11_groupby {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("groupby")
    val sc = new SparkContext(conf)
    //按照固定的规则，进行分组
    val value = sc.makeRDD(List("Hadoop", "Spark", "Hase", "Scala", "Hive"))
    //按照第一个字母进行分组
    val value1 = value.groupBy(e => e(0))
    println(value1.collect().toList)
    //读取文件 实现wordCount
    val value2 = sc.textFile("input/word.txt")
    println(value2.flatMap(e=>e.split(" ").toList).groupBy(e=>e)
      .map(e=>{(e._1,e._2.size)})
      .collect().toList)
  }
}
